**********************************************************************************************************************
* PPI Data
**********************************************************************************************************************
clear
set more off

*global rootfolder ""
global ppifolder "$rootfolder\data\1_PPI"
global concordancefolder "$rootfolder\data\0_Concordances&OtherAuxiliaryFiles\output"
capture mkdir "$ppifolder\output"
cd "$ppifolder\output"

global EUROSTAT = 1
if $EUROSTAT == 1 {

*-----------------------------------------------------------------------------------------------------------
*1.1: PPI data Eurostat (EUST)
*-----------------------------------------------------------------------------------------------------------

*-------------------------------------------------------------------------------------------------
*1.1.0.1: prepare before insheeting (using a text-editor/excel)
*-------------------------------------------------------------------------------------------------

*-------------------------------------------------------------------------------------------------
*1.1.0.2: recover observations from old version 
*-------------------------------------------------------------------------------------------------

***insheet:
clear
insheet using "$ppifolder\sts_inppd_m_(outdated2011).txt", tab names

***drop unnecessary vars:
drop indic_bt s_adj

***destring and reshape month:
set more off
destring y2010m08-y1975m01, replace ignore("p" "s" "c" "ei" "e" ":" "cp" "ce")
*keep if geo == "IE" | geo == "DK" | geo == "BE" |  geo == "FR" |  geo == "UK" |  geo == "IE" |  geo == "PT" 
reshape long y, i(geo nace) j(month) string
rename y ppi_old
rename month tmp_month



***edit nace:
drop if strmatch(nace,"*_*") == 1 | strmatch(nace,"*-*") == 1 // drop the higher than 2-digit aggregates
drop if substr(nace,2,1)=="" // drop the single letter aggregates
replace nace = substr(nace,2,.)
gen tmp_nace = "."
replace tmp_nace = substr(nace,1,2) + "." + substr(nace,3,.)
drop if substr(tmp_nace,-2,.) == ".0"
drop if geo == "EA16" | geo == "EU27" | geo == "EU15"
replace nace = tmp_nace


gen month = monthly(tmp_month,"YM")
gen year = real(substr(tmp_month,1,4))
egen tmp_ID = group(geo nace) 
xtset tmp_ID month, m
gen ppi_chng_old = ln(ppi_old/l1.ppi_old)

drop tmp_*
compress
save 1.1.0.2_PPI_Eurostat_helpfile_0.dta, replace

*-------------------------------------------------------------------------------------------------
*1.1.1: insheet, drop unnecessary vars, destring, reshape, edit nace
*-------------------------------------------------------------------------------------------------

***insheet:
clear
insheet using "$ppifolder\sts_inppd_m.csv", delim(";")

* small work around because Stata does not allow 
* the first char of varnames to be a number
foreach var of varlist _all {
	local newname = `var'[1]
	rename `var' y`newname'
}
drop in 1
rename ynace_r2 nace_r2
rename ygeo geo

***drop unnecessary vars:
drop yindic_bt ys_adj

***destring and reshape month:
set more off
destring y2015M03-y1975M01, replace ignore("p" "s" "c" "ei" "e" ":" "cp" "ce")
reshape long y, i(geo nace) j(month) string
rename y ppi

***replace to year and continuous-month:
gen year = real(substr(month,1,4))
gen help = real(substr(month,6,2))
drop month
gen month = (year-1960)*12 + help-1
drop help

***edit nace:
drop if strmatch(nace,"*_*") == 1 | strmatch(nace,"*-*") == 1 // drop the higher than 2-digit aggregates
drop if substr(nace,2,1)=="" // drop the single letter aggregates
replace nace = substr(nace,2,.)
replace nace = substr(nace,1,2) + "." + substr(nace,3,.)

*** recoding of IE as GR
replace geo = "GR" if geo == "EL"
compress
save tmp_1.1.1file.dta, replace


u tmp_1.1.1file.dta, clear
*erase tmp_1.1.1file.dta

*** restoring DNK and IE values
joinby geo nace_r2 month year using 1.1.0.2_PPI_Eurostat_helpfile_0.dta, unm(both)

gen tmp_m = 0
gen tmp_repl = 0
gen tmp_ones = 0

replace tmp_m = 1 if ppi == .
bysort geo nace_r2: egen tmp_sum = total(tmp_m) // number of missing obs
bysort geo nace_r2: gen tmp_N = _N // number of obs
bysort geo nace_r2: replace tmp_repl = tmp_sum == _N //fully missing series if the sum of missing = number of observations
sort geo nace 

gen tmp_nbas = ppi/ppi_old if ppi != . & ppi_old != .
bysort geo nace_r2: egen tmp_newbas = mean(tmp_nbas)



* recover if the country sector bucket is empty over the full horizon (no rebasing)
replace ppi = ppi_old if _merge != 1 & tmp_repl == 1 & ppi_old != .

* recover if there is an overlap between ppi and ppi_old within one country sector bucket 
* in this case ppi_old gets rebased and copied into ppi
replace ppi = ppi_old*tmp_newbas if _merge != 1 & tmp_repl == 0 & tmp_m == 1 & ppi_old != . & tmp_newbas != .

* in case there is no overlap we can't rebase, so the series is just copied w/o rebasing
replace ppi = ppi_old if _merge != 1 & tmp_repl == 0 & tmp_m == 1 & ppi_old != . & ppi == . & tmp_newbas == .


drop tmp_* _merge ppi_old


***save as helpfile:
compress
save 1.1.1_PPI_Eurostat_helpfile_1.dta, replace


*-------------------------------------------------------------------------------------------------
*1.1.2: convert from NACE rev. 2 to NACE rev. 1.1
*-------------------------------------------------------------------------------------------------
*note:	*do for all levels (4-, 3- and 2-digits)
		*use joinby and not merge
		*don't use the factor variable of the concordance table in here as it is PPI

***joinby the 4-to-4-digit-concordance-table:
clear
use 1.1.1_PPI_Eurostat_helpfile_1.dta
sort nace_r2
joinby nace_r2 using "$concordancefolder\0.4.2_concordances_nace-r2-4digit_to_nace-r1-1-4digit.dta"
generate concordance = 4	// a variable I will use later to clean some stuff (explan. later)
compress
save 1.1.2.1_PPI_Eurostat_helpfile_4digit.dta, replace

***joinby the 3-to-3-digit-concordance-table:
clear
use 1.1.1_PPI_Eurostat_helpfile_1.dta
sort nace_r2
joinby nace_r2 using "$concordancefolder\0.4.3_concordances_nace-r2-3digit_to_nace-r1-1-3digit.dta"
generate concordance = 3	// a variable I will use later to clean some stuff (explan. later)
compress
save 1.1.2.2_PPI_Eurostat_helpfile_3digit.dta, replace

***joinby the 2-to-2-digit-concordance-table:
clear
use 1.1.1_PPI_Eurostat_helpfile_1.dta
sort nace_r2
joinby nace_r2 using "$concordancefolder\0.4.4_concordances_nace-r2-2digit_to_nace-r1-1-2digit.dta"
generate concordance = 2	// a variable I will use later to clean some stuff (explan. later)
compress
save 1.1.2.3_PPI_Eurostat_helpfile_2digit.dta, replace

***append:
clear
use 1.1.2.1_PPI_Eurostat_helpfile_4digit.dta
append using 1.1.2.2_PPI_Eurostat_helpfile_3digit.dta
append using 1.1.2.3_PPI_Eurostat_helpfile_2digit.dta

***collapse to unique nace_r1_1:
sort geo nace_r1_1 year month concordance
collapse ppi, by(geo nace_r1_1 year month concordance)

***clean (making use of the concordance variable):
drop if geo == geo[_n-1] & nace_r1_1 == nace_r1_1[_n-1] & year == year[_n-1] & month == month[_n-1]
drop concordance

***explanation: *the concordance NACE rev. 2 to NACE rev. 1.1 for 4digit to 4digit and ///
				 *3digit to 3 digit at times cannot match to 4digits (resp. 3digits.) ///
				 *and therefore matches to lower levels.
				*if there exists direct info on 2digit (resp. 3 digit) info for a sector, ///
				 *we want to use this info and not some conversion from a higher digit level
				*but if there doesn't exit direct info, we want to keep this information.
				*to do this I generated the concordance variable and use the above codes to ///
				 *collapse regarding this problem and then drop the unnecessary observations.
				*this works as the data is sorted by the concordance variable (among others) and ///
				 *results from higher levels appera after more direct information.
				*note: I use the same strategy again in the next section.

***save as helpfile:
compress
sort geo nace_r1_1 year month
compress
save 1.1.2_PPI_Eurostat_helpfile_2.dta, replace
	*note: this file contains all the direct data we have

*-------------------------------------------------------------------------------------------------
*1.1.3: generate more information on lower NACE levels
*-------------------------------------------------------------------------------------------------
*note:	*we want to have all available info for all digit-levels (2, 3 and 4)
		*to do this I generate 2- and 3-digit info from 4 digit observations as well as 2-digit
			*info from 3-digit observations
		*as comparably done above, I check whether we already have this info and if we already
	 		*have it I prefer the more direct info (I use the variable: steps)
 
***put a steps variable in the dataset:
clear
use 1.1.2_PPI_Eurostat_helpfile_2.dta
generate steps = 0							// the steps var for later use
compress
save 1.1.3.1_PPI_Eurostat_helpfile_allDigits.dta, replace

***generate 3-digit info from the 4-digit observations:
clear
use 1.1.2_PPI_Eurostat_helpfile_2.dta
drop if substr(nace_r1_1,5,1) == ""			// drop all 3- and 2-digit cases
replace nace_r1_1 = substr(nace_r1_1,1,4)	// replace all to 3-digits
sort geo nace_r1_1 year month
collapse ppi, by(geo nace_r1_1 year month)
generate steps = 1							// the steps var for later use
compress
save 1.1.3.2_PPI_Eurostat_helpfile_add_3digit.dta, replace

***generate 2-digit info from the 4-digit observations:
clear
use 1.1.2_PPI_Eurostat_helpfile_2.dta
drop if substr(nace_r1_1,5,1) == ""			// drop all 3- and 2-digit cases
replace nace_r1_1 = substr(nace_r1_1,1,3)	// replace all to 2-digits
sort geo nace_r1_1 year month
collapse ppi, by(geo nace_r1_1 year month)
generate steps = 2							// the steps var for later use
compress
save 1.1.3.3_PPI_Eurostat_helpfile_add_2digit_1.dta, replace

***generate 2-digit info from the 3-digit observations:
clear
use 1.1.2_PPI_Eurostat_helpfile_2.dta
drop if substr(nace_r1_1,5,1) != ""			// drop all 4-digit cases
drop if substr(nace_r1_1,4,1) == ""			// drop all 2-digit cases
replace nace_r1_1 = substr(nace_r1_1,1,3)	// replace all to 2-digits
sort geo nace_r1_1 year month
collapse ppi, by(geo nace_r1_1 year month)
generate steps = 1							// the steps var for later use
compress
save 1.1.3.4_PPI_Eurostat_helpfile_add_2digit_2.dta, replace

***append all the files:
clear
use 1.1.3.1_PPI_Eurostat_helpfile_allDigits.dta
append using 1.1.3.2_PPI_Eurostat_helpfile_add_3digit.dta
append using 1.1.3.3_PPI_Eurostat_helpfile_add_2digit_1.dta
append using 1.1.3.4_PPI_Eurostat_helpfile_add_2digit_2.dta

***drop the unnecessary info:
sort geo nace_r1_1 year month steps
drop if geo == geo[_n-1] & nace_r1_1 == nace_r1_1[_n-1] & year == year[_n-1] & month == month[_n-1]
drop steps

***generate a variable indicating whether the info is original or generated:
sort geo nace_r1_1 year month
merge geo nace_r1_1 year month using 1.1.2_PPI_Eurostat_helpfile_2.dta
gen original = 0 if _merge==1
replace original = 1 if _merge==3
drop _merge

***save as helpfile:
compress
save 1.1.3_PPI_Eurostat_helpfile_3.dta, replace

*-------------------------------------------------------------------------------------------------
*1.1.4. final edit to unify with other data
*-------------------------------------------------------------------------------------------------

clear
use 1.1.3_PPI_Eurostat_helpfile_3.dta

***generate source and currency vars
generate source = 1
generate currency = .

***edit country
rename geo country
replace country = "GB" if country == "UK" // important - ISO is at times UK and at times GB

***label:
label var country "Country"
label var year "Year"
label var month "Month (contin., 1960/01 = 0)"
label var nace_r1_1 "NACE rev. 1.1"
label var ppi "PPI, monthly (monthly average)"
label var source "$source"
label var currency "$currency"
label var original "1 if original, 0 if generated"

*-----------------------------------------------------
compress
sort country nace_r1_1 year
save 1.1_PPI_Eurostat_NACE.dta, replace
*-----------------------------------------------------

}
* end of EUROSTAT


************************************************************************************************************
************************************************************************************************************
************************************************************************************************************
*-----------------------------------------------------------------------------------------------------------
*1.2: PPI data Bureau of Labor Statistics (BLS)
*-----------------------------------------------------------------------------------------------------------
************************************************************************************************************
************************************************************************************************************
************************************************************************************************************
global BLS = 1

if $BLS == 1 {

*-------------------------------------------------------------------------------------------------
*1.2.1: insheet and edit
*-------------------------------------------------------------------------------------------------

clear

* different from SNB version: use the compressed biger datafile that has values further back in time
insheet using "$ppifolder\bls.pc.date201513"

split v1, gen(stub)
drop v1

rename stub2 year
rename stub3 yearavg
rename stub4 m1
rename stub5 m2
rename stub6 m3
rename stub7 m4
rename stub8 m5
rename stub9 m6
rename stub10 m7
rename stub11 m8
rename stub12 m9
rename stub13 m10
rename stub14 m11
rename stub15 m12
	

*rows that start with T don't have observations
drop if substr(stub1,1,1)=="T"

foreach vari of varlist yearavg m* {
	replace `vari' = substr(`vari',1,length(`vari')-1)
}

rename stub1 series_id
replace series_id = substr(series_id,2,.)
keep if substr(series_id,1,3)=="PCU"

destring m* , replace float
destring year, replace
drop yearavg

rename series_id naics 
reshape long m, i(naics year) j(mon) string

rename m ppi
drop if ppi== 0
drop if ppi== 1
destring mon, replace

gen month = ym(year, mon)
format %tm month
drop mon

* manual changes (there are some series with crazy values, like inflation of -100%)
replace ppi = 240 if naics == "PCU311812311812MM" & month == 512
replace ppi = 240 if naics == "PCU311812311812M" & month == 512
drop if naics == "PCU3231133231134"  & month < 513
drop if naics == "PCU42993042993014"


sort naics year month
keep year month naics ppi

*save as helpfile
compress
save 1.2.1_PPI_BLS_helpfile_1.dta, replace

/*
clear
insheet using ///
"$ppifolder\1.2_PPI_BLS_monthly\pc.data.0.Current04.2015.txt", tab


drop footnotes
keep if substr(series_id,1,3) == "PCU"
drop if period == "M13"

*generate date variable
gen per = substr(period,2,2)
destring per, replace
gen month = ym(year,per)
format month %tm

list value if series == "PCU311812311812MM" & month == 512
list value if series == "PCU311812311812M" & month == 512

* manual changes
replace value = 240 if series == "PCU311812311812MM" & month == 512
replace value = 240 if series == "PCU311812311812M" & month == 512
drop if series == "PCU3231133231134"  & month < 513

***rename as in other data:
rename series_id naics
rename value ppi


***collapse:
sort naics year month
keep ppi naics year month

*save as helpfile
save 1.2.1_PPI_BLS_helpfile_1.dta, replace
*/


*-------------------------------------------------------------------------------------------------
*1.2.2: edit the NAICS codes
*-------------------------------------------------------------------------------------------------
*note:	*the nomenclature is : 3-letters - NAICS-code - some longer coding
		*there is 3-, 4-, 5- and 6-digit information
		*example: PCU211--- PCU2111-- PCU21111- PCU211111

clear
use 1.2.1_PPI_BLS_helpfile_1.dta

gen naics_level = .
replace naics_level = 3 if substr(naics,7,1)=="-"
replace naics = substr(naics,4,3) if substr(naics,7,1)=="-"
replace naics_level = 4 if substr(naics,7,1)!="-" & substr(naics,8,1)=="-"
replace naics = substr(naics,4,4) if substr(naics,7,1)!="-" & substr(naics,8,1)=="-"
replace naics_level = 5 if substr(naics,8,1)!="-" & substr(naics,9,1)=="-"
replace naics = substr(naics,4,5) if substr(naics,8,1)!="-" & substr(naics,9,1)=="-"
replace naics_level = 6 if substr(naics,9,1)!="-" & substr(naics,9,1)!="" & substr(naics,16,1)==""
replace naics = substr(naics,4,6) if substr(naics,9,1)!="-" & substr(naics,9,1)!="" & substr(naics,16,1)==""
drop if substr(naics,16,1)!=""
drop if	strmatch(naics,"*A*") == 1 | strmatch(naics,"*B*") == 1 | strmatch(naics,"*O*") == 1


*drop if naics_level>=5 // I use the 3-to-4 and 4-to-4 correspondance, not the 5-to-4
compress
save temp.dta, replace

**generate log change
egen id = group(naics)
xtset id month
gen dppi = ln(ppi)-ln(l1.ppi)


***generate a variable indicating whether the info is original or generated:
gen original = 1

***save as helpfile:
rename naics naics_2002
compress
save 1.2.2_PPI_BLS_helpfile_2.dta, replace


*-------------------------------------------------------------------------------------------------
*1.2.3: convert from NAICS 2002 to NACE rev. 1.1
*-------------------------------------------------------------------------------------------------

use 1.2.2_PPI_BLS_helpfile_2.dta, clear


***convert to NACE rev. 1.1:

sort naics_2002
joinby naics_2002 using "$concordancefolder\0.2.7_concordances_naics-2002-3digit_to_nace-r1-1-3digit.dta", unmatched(master)
rename nace_r1_1 nace_help_1
drop _merge factor
sort naics_2002
joinby naics_2002 using "$concordancefolder\0.2.4_concordances_naics-2002-4digit_to_nace-r1-1-4digit.dta", unmatched(master)
rename nace_r1_1 nace_help_2
drop _merge factor
sort naics_2002
joinby naics_2002 using "$concordancefolder\0.2.3_concordances_naics-2002-5digit_to_nace-r1-1-4digit.dta", unmatched(master)
rename nace_r1_1 nace_help_3
drop _merge factor
sort naics_2002
joinby naics_2002 using "$concordancefolder\0.2.2_concordances_naics-2002-6digit_to_nace-r1-1-4digit.dta", unmatched(master)
rename nace_r1_1 nace_help_4
drop _merge factor


***gen one nace variable and collapse:
gen nace_r1_1 = nace_help_1 if naics_level==3
replace nace_r1_1 = nace_help_2 if naics_level==4
replace nace_r1_1 = nace_help_3 if naics_level==5
replace nace_r1_1 = nace_help_4 if naics_level==6

drop if nace_r1_1 == ""

collapse dppi, by(nace_r1_1 year month)



***save as helpfile:
sort nace_r1_1 year month
compress
save 1.2.3_PPI_BLS_helpfile_3a.dta, replace

save 1.2.3_PPI_BLS_helpfile_3.dta, replace
*-------------------------------------------------------------------------------------------------
*1.2.4: generate more aggreagted info from less aggregated cases in NACE rev. 1.1
*-------------------------------------------------------------------------------------------------

***generate 3-digit info from 4 digit cases:
clear
use 1.2.3_PPI_BLS_helpfile_3a.dta
drop if substr(nace_r1_1,5,1) == ""			// drop all 3-digit cases
replace nace_r1_1 = substr(nace_r1_1,1,4)	// replace all to 3-digits

collapse dppi, by(nace_r1_1 year month)


***keep the 3 digit info only if it's not available directly as 3 digit info: 
*can be the case as we've used a concordance
sort nace_r1_1 year month
merge 1:1 nace_r1_1 year month using 1.2.3_PPI_BLS_helpfile_3.dta
keep if _merge==1 // the observation that are in the master file (i.e. 3 digit created from 4 digit) but not in the using data (ie the original date)
drop _merge
generate original = 0
compress
save 1.2.3.1_PPI_BLS_helpfile_add_3digit.dta, replace

***generate 2-digit info from 3 digit cases:
clear
use 1.2.3_PPI_BLS_helpfile_3.dta //original data
append using 1.2.3.1_PPI_BLS_helpfile_add_3digit.dta //add the newly created 3 digit categories
drop if substr(nace_r1_1,5,1) != ""			// drop all 4-digit cases
replace nace_r1_1 = substr(nace_r1_1,1,3)	// replace all to 2-digits
collapse dppi, by(nace_r1_1 year month)
generate original = 0
compress
save 1.2.3.2_PPI_BLS_helpfile_add_2digit.dta, replace

***append all the info (including an "original" variable):
clear
use 1.2.3_PPI_BLS_helpfile_3.dta
generate original = 1
append using 1.2.3.1_PPI_BLS_helpfile_add_3digit.dta
append using 1.2.3.2_PPI_BLS_helpfile_add_2digit.dta

** transform in index
bys nace_r1_1 (month): gen sum_logdif = sum(dppi)
gen ppi = 100*exp(sum_logdif)
egen tmpid = group(nace_r1_1)
xtset tmpid month
gen dppitest = ln(ppi)-ln(l1.ppi)
count if dppi==dppitest
count if dppi!=dppitest
count if abs(dppi-dppitest) > 0.00001 & !mi(dppi)

drop tmpid dppitest sum_logdif dppi

***save as helpfile:
compress
save 1.2.4_PPI_BLS_helpfile_4.dta, replace

*-------------------------------------------------------------------------------------------------
*1.2.5: final edit to unify with other data - NAICS version
*-------------------------------------------------------------------------------------------------
clear
use 1.2.2_PPI_BLS_helpfile_2.dta

***generate country, source and currency vars:
generate country = "US"
generate source = 3
generate currency = .

***label:
label var country "Country"
label var year "Year"
label var month "Month (contin., 1960/01 = 0)"
label var naics_2002 "NAICS 2002"
label var ppi "PPI, monthly (monthly average)"
label var source "$source"
label var currency "$currency"
label var original "1 if original, 0 if generated"

*-----------------------------------------------------
compress
sort country naics_2002 year month
save 1.2_PPI_BLS_NAICS.dta, replace
*-----------------------------------------------------

*-------------------------------------------------------------------------------------------------
*1.2.6: final edit to unify with other data - NACE version
*-------------------------------------------------------------------------------------------------

clear
use 1.2.4_PPI_BLS_helpfile_4.dta

***generate country, source and currency vars:
generate country = "US"
generate source = 3
generate currency = .

***label:
label var country "Country"
label var year "Year"
label var month "Month (contin., 1960/01 = 0)"
label var nace_r1_1 "NACE rev. 1.1"
label var ppi "PPI, monthly (monthly average)"
label var source "$source"
label var currency "$currency"
label var original "1 if original, 0 if generated"

*-----------------------------------------------------
compress
sort country nace_r1_1 year month 
save 1.2_PPI_BLS_NACE.dta, replace
*-----------------------------------------------------
}
*end of BLS



************************************************************************************************************
************************************************************************************************************
************************************************************************************************************
*-----------------------------------------------------------------------------------------------------------
*1.3: PPI data Statistics Canada (SC)
*-----------------------------------------------------------------------------------------------------------
************************************************************************************************************
************************************************************************************************************
************************************************************************************************************
global STATSCAN = 1

if $STATSCAN == 1 {

*-------------------------------------------------------------------------------------------------
*1.3.1: insheet, edit
*-------------------------------------------------------------------------------------------------


***insheet:
clear
insheet using "$ppifolder\cansim7427889004903320481.csv", comma names

drop in 1/397
drop v2

* small work around because Stata does not allow 
* the first char of varnames to be a number
foreach var of varlist _all {
	local newname = `var'[1]
	rename `var' var_`newname'
}
drop in 1

gen month = monthly(var_Monthly,"MY")
format month %tm
drop var_Monthly
gen year = yofd(dofm(month))

reshape long var_, i(month) j(sc_sctr_code) s
rename var_ ppi
replace ppi = "." if ppi == ".."
destring ppi, replace ignore(c x)

*joinby sc_sctr_code using 1.3.1_conc_SCsectrcode_to_NAICS02_helpfile_2.dta, unm(master)


***save as helpfile:
compress

save 1.3.1_PPI_SC_helpfile_1.dta, replace

*-------------------------------------------------------------------------------------------------
*1.3.2: insheet naics-codes helpfile and merge
*-------------------------------------------------------------------------------------------------

* naics_2007 ppi year month naics_level original
*** mini concordance
insheet using "$ppifolder\canada_ppi_data_decription_naics2007.csv", clear delim(";") nod
gen naics_2007 = string(v3)

rename v1 sc_sctr_code
rename v2 naics
drop v3
compress

***generate naics-levels:
generate naics_level = .
replace naics_level=3 if substr(naics_2007,3,1)!="" & substr(naics_2007,4,1)==""
replace naics_level=4 if substr(naics_2007,4,1)!="" & substr(naics_2007,5,1)==""
replace naics_level=5 if substr(naics_2007,5,1)!="" & substr(naics_2007,6,1)==""
replace naics_level=6 if substr(naics_2007,6,1)!=""

***merge to helpfile from above:
*sort naics
joinby sc_sctr_code using 1.3.1_PPI_SC_helpfile_1.dta, unm(u)
tabulate _merge
drop if _merge!=3
drop _merge naics sc_sctr_code
*tostring naics_2007, replace
*drop if naics_2007=="."

***generate a variable indicating whether the info is original or generated:
gen original = 1
compress
save 1.3.2_PPI_SC_helpfile_2.dta, replace

*-------------------------------------------------------------------------------------------------
*1.3.3: convert from NAICS 2007 to NACE rev. 1.1
*-------------------------------------------------------------------------------------------------

clear
use 1.3.2_PPI_SC_helpfile_2.dta

destring naics_2, replace

xtset naics_2 month, m
gen tmp_d_ppi = ln(ppi) -ln(l1.ppi)

* changes

***select data:
drop if naics_level>=5 // we can convert to 4-digit nace - I use the 4-to-4 correspondance, not the 5-to-4
/*
gen naics_max4d = naics_2 
replace naics_max4d = floor(naics_2/100) if naics_level == 6
replace naics_max4d = floor(naics_2/10) if naics_level == 5
gen tmp_collector = ppi if naics_max4d == naics_2
bysort naics_m month: egen tmp_spread = mean(tmp_collector)
*/

***convert to NACE rev. 1.1:
sort naics_2 

joinby naics_2 using "$concordancefolder\0.2.0.10_concordances_naics-2007-3digit_to_nace-r1-1-3digit.dta", unmatched(master)
rename nace_r1_1 nace_help_1
tabulate _merge
drop _merge factor
sort naics_2
joinby naics_2 using "$concordancefolder\0.2.0.7_concordances_naics-2007-4digit_to_nace-r1-1-4digit.dta", unmatched(master)
rename nace_r1_1 nace_help_2
tabulate _merge
drop _merge factor


***replace and collapse:
gen nace_r1_1 = nace_help_1 if naics_level==3
replace nace_r1_1 = nace_help_2 if naics_level==4
collapse tmp_d_ppi, by(nace_r1_1 year month)

sort nace_r1_1 month
bysort nace_r1_1: replace tmp_d_ppi = ln(100) if _n == 1

bysort nace_r1_1: gen tmp_ppi = exp(sum(tmp_d_ppi))
gen ppi = tmp_ppi
drop tmp_*

***save as helpfile:
compress
save 1.3.3_PPI_SC_helpfile_3.dta, replace

*-------------------------------------------------------------------------------------------------
*1.3.4: generate more aggreagted info from less aggregated cases in NACE rev. 1.1
*-------------------------------------------------------------------------------------------------

***generate 3-digit info from 4 digit cases:
clear
use 1.3.3_PPI_SC_helpfile_3.dta
drop if substr(nace_r1_1,5,1) == ""			// drop all 3-digit cases
replace nace_r1_1 = substr(nace_r1_1,1,4)	// replace all to 3-digits
collapse ppi, by(nace_r1_1 year month)

***keep the 3 digit info only if it's not available directly as 3 digit info: 
*can be the case as we've used a concordance
sort nace_r1_1 year month
merge 1:1 nace_r1_1 year month using 1.3.3_PPI_SC_helpfile_3.dta
keep if _merge==1
drop _merge
generate original = 0
compress
save 1.3.3.1_PPI_SC_helpfile_add_3digit.dta, replace

***generate 2-digit info from 3 digit cases:
clear
use 1.3.3_PPI_SC_helpfile_3.dta
append using 1.3.3.1_PPI_SC_helpfile_add_3digit.dta
drop if substr(nace_r1_1,5,1) != ""			// drop all 4-digit cases
replace nace_r1_1 = substr(nace_r1_1,1,3)	// replace all to 2-digits
collapse ppi, by(nace_r1_1 year month)
generate original = 0
compress
save 1.3.3.2_PPI_SC_helpfile_add_2digit.dta, replace

***append all the info (including an "original" variable):
clear
use 1.3.3_PPI_SC_helpfile_3.dta
generate original = 1
append using 1.3.3.1_PPI_SC_helpfile_add_3digit.dta
append using 1.3.3.2_PPI_SC_helpfile_add_2digit.dta

***save as helpfile:
compress
save 1.3.4_PPI_SC_helpfile_4.dta, replace

*-------------------------------------------------------------------------------------------------
*1.3.5: final edit to unify with other data - NAICS version
*-------------------------------------------------------------------------------------------------

clear
use 1.3.2_PPI_SC_helpfile_2.dta

***generate country, source and currency vars:
generate country = "CA"
generate source = 6
generate currency = 1

***label:
label var country "Country"
label var year "Year"
label var month "Month (contin., 1960/01 = 0)"
label var naics_2007 "NAICS 2007"
label var ppi "PPI, monthly (monthly average)"
label var source "$source"
label var currency "$currency"
label var original "1 if original, 0 if generated"

*-----------------------------------------------------
compress
sort country naics_2007 year month
save 1.3_PPI_SC_NAICS.dta, replace
*-----------------------------------------------------

*-------------------------------------------------------------------------------------------------
*1.3.6: final edit to unify with other data - NACE version
*-------------------------------------------------------------------------------------------------

clear
use 1.3.4_PPI_SC_helpfile_4.dta

drop if month<ym(1994,12)

***generate country, source and currency vars:
generate country = "CA"
generate source = 6
generate currency = .

***label:
label var country "Country"
label var year "Year"
label var month "Month (contin., 1960/01 = 0)"
label var nace_r1_1 "NACE rev. 1.1"
label var ppi "PPI, monthly (monthly average)"
label var source "$source"
label var currency "$currency"
label var original "1 if original, 0 if generated"

*-----------------------------------------------------
compress
sort country nace_r1_1 year month
save 1.3_PPI_SC_NACE.dta, replace
*-----------------------------------------------------
}
* end of stats canada


*-----------------------------------------------------------------------------------------------------------
*1.x: Append all Regional PPI Data
*-----------------------------------------------------------------------------------------------------------
clear
use 1.1_PPI_Eurostat_NACE.dta
append using 1.2_PPI_BLS_NACE.dta
append using 1.3_PPI_SC_NACE.dta

***
format month %tm
*drop if month < 240

*-----------------------------------------------------
compress
sort country nace_r1_1 year
save 1_PPI_AllRegions_NACE.dta, replace
*-----------------------------------------------------



**
* Create a 2digits NACE version only
**
use 1_PPI_AllRegions_NACE.dta, clear
drop if  ppi==.
gen length=length( nace_r1_1)-1
drop if length>2
drop year length
format month %tm
sort  country nace_r1_1 month
compress
save 1_PPI_AllRegions_NACE_2D.dta, replace


*********************************************************
* ADDITIONALLY: LINEAR INTERPOLATION FOR QUARTERLY DATA
*********************************************************
use 1_PPI_AllRegions_NACE_2D.dta, clear
drop original source currency


* Drop missing PPI's
drop if ppi==.

* Generate quarter and year variables
format month %10.0g
gen date=dofm(month)
format date %d
rename month time
gen year=year(date)
gen month=month(date)
gen quarter=quarter(date)
drop date

* Identify quarterly data
bysort country nace_r1_1 year quarter: egen ppi_sd_q=sd(ppi)
gen check_q=0
replace check_q=1 if ppi_sd_q<10^(-10) // precision problems, cannot set it to 0
drop ppi_sd_q

* Identify yearly data
bysort country nace_r1_1 year: egen ppi_sd_a=sd(ppi)
gen check_a=0
replace check_a=1 if ppi_sd_a<10^(-10) // precision problems, cannot set it to 0
drop ppi_sd_a

// The problem here is that the price index could simply be constant during a quarter/year, without necessarily indicating quarterly/yearly data.
// The idea we adopt here is that the price index needs to be constant every quarter/year for a given country and nace_r1_1 for it to be considered as an "originally quarterly/yearly" price index.

bysort country nace_r1_1: egen check_q_min=min(check_q)
bysort country nace_r1_1: egen check_a_min=min(check_a)

gen ppi_copy=ppi // in order to be able to subsequently check if the linear interpolation has worked

replace ppi_copy=. if check_q_min==1 & (month==1 | month==3 | month==4 | month==6 | month==7 | month==9 | month==10 | month==12)
replace ppi_copy=. if check_a_min==1 & (month<6 | month>6)

bysort country nace_r1_1 (time): ipolate ppi_copy time, gen(ppi_inter) epolate

replace ppi=ppi_inter if check_q_min==1 | check_a_min==1

drop year month quarter check* ppi_copy ppi_inter

format time %tm

rename time month
compress
save 1_PPI_AllRegions_NACE_2D_data_added_interpol.dta, replace




**********************************************************************************************************************
***Datastream
**********************************************************************************************************************
* This do file uses the manual conversion table from datastream series to the WIOD Code classification
* to convert the DS series.
*
*
global DATASTREAM 1
if $DATASTREAM == 1 {

clear 
set mem 700m
set matsize 6500
set maxvar 32767
	
set more off

*first create the conversion table file from isic to wiod_code
clear
import excel using "$ppifolder\isic_wiod_code_correspondance.xlsx", first
compress
save temp_DS_manual_isicwiod.dta, replace


*first the quarterly ppi data
foreach cc in AU  {

	clear
	import excel using "$ppifolder\dsname_isic_manual_conversion_table.xlsx", sheet(`cc') firstrow
	*some countrz codes have special caracter, so we change them
	replace ds_code = subinstr(ds_code,".","9",.)
	compress
	save tempDSISIC.dta, replace



	*
	clear
	insheet using "$ppifolder\DS_PPI_`cc'_q.csv", delim(",") non

	foreach dd of varlist _all {
		local newname = `dd'[1]
		*some countrz codes have special caracter, so we change them
		local newname = subinstr("`newname'",".","9",.)
		local newname = subinstr("`newname'","&","_",.)
		local newname = subinstr("`newname'","%","8",.)
		
		rename `dd' v`newname'
	}

	rename vDATE date
	drop in 1
	drop if mi(date)

	reshape long v, i(date) j(ds_code) string
	rename v ppi
	replace ds_code = strupper(ds_code)
	merge m:1 ds_code using tempDSISIC.dta, keepusing(isic flag)
	drop _merge

	destring ppi, replace ignore("NA")
	
	if "`cc'" == "AU" {
		*generate the quarter variable
		tostring date, replace
		gen year = substr(date,1,4)
		gen q = substr(date,5,2)
		destring year q, replace
		gen quarter = yq(year, q)
		format %tq quarter
		drop date
		
		gen stub2 = 2 if q==1
		replace stub2 = 5 if q==2
		replace stub2 = 8 if q==3
		replace stub2 = 11 if q==4
		
		*expand to have monthly data
		sort quarter
		expand 3
		sort ds_code quarter
		bys ds_code quarter: gen counter = _n
		gen month = stub2 - 1 if counter==1
		replace month = stub2  if counter==2
		replace month = stub2 + 1 if counter==3
		
		gen date = ym(year,month)
		format %tm date
	}

	*keep only midquarter ppi
	replace ppi = . if counter==1 | counter==3
	drop stub* quarter counter q

	encode ds_code, gen(id)

	xtset id date
	*intrapolate ppi
	bys ds_code: ipolate ppi date, gen(ipoppi)
	replace ppi = ipoppi
	drop ipoppi
	xtset id date
	gen dppi = ln(ppi) - ln(L1.ppi)


	*duplicate observation that concern more than one isic 2digit category
	split isic, parse("_") gen(stub)

	capture expand 2 if stub3!="", gen(tag1)
	*if there is no error, then it means that there is a least one observation with
	*3 ISIC 2d
	if ! _rc {
		*so in that case, we replace the isic code by the 3rd category 
		replace stub1 = stub3 if tag1==1
		
		*then expand the 2nd category
		expand 2 if stub2!="" & tag1==0, gen(tag2)
		replace stub1 = stub2 if tag2==1		
	}
	*if there was an error, then proceed directly to 2nd category
	else {
		capture expand 2 if stub2!="", gen(tag2)
		if ! _rc {
			replace stub1 = stub2 if tag2==1
		}
	}

	replace isic = stub1
	drop if isic==""
	replace isic = strlower(isic)
	drop tag* stub*

	drop if isic=="d" // d is manufacturing, it is too aggregated..
	joinby isic using temp_DS_manual_isicwiod.dta, unm(master)
	tab _merge
	drop _merge
	
	*collapse by WIOD CODE
	collapse (mean) dppi, by(date year month wiod_code)

	bys wiod_code (date): gen sumld = sum(dppi)
	gen ppi = 100*exp(sumld) if !mi(dppi)
	drop sumld
	drop dppi

	gen country = "`cc'"
	sort wiod_code date
	compress
	save 1.6_PPI_Datastream_manual_wiod_`cc', replace
}



*now monthly ppi data
foreach cc in BR ID IN JP KR MX RU TW  {

	display "PPI from `cc'"
	clear
	import excel using "$ppifolder\dsname_isic_manual_conversion_table.xlsx", sheet(`cc') firstrow
	
	drop if mi(ds_code)
	destring flag, replace
	replace ds_code = subinstr(ds_code,".","9",.)
	replace ds_code = subinstr(ds_code,"&","_",.)
	replace ds_code = subinstr(ds_code,"%","8",.)
	compress
	save tempDSISIC.dta, replace

	*
	clear
	insheet using "$ppifolder\DS_PPI_`cc'_m.csv", delim(",") non

	
	foreach dd of varlist _all {
		local newname = `dd'[1]
		local newname = subinstr("`newname'",".","9",.)
		local newname = subinstr("`newname'","&","_",.)
		local newname = subinstr("`newname'","%","8",.)
		
		rename `dd' v`newname'
	}

	rename vDATE date
	drop in 1
	drop if mi(date)

	reshape long v, i(date) j(ds_code) string
	rename v ppi

	replace ds_code = strupper(ds_code)
	merge m:1 ds_code using tempDSISIC.dta, keepusing(isic flag)
	drop _merge

	destring ppi, replace ignore("NA")

	*generate the month variable
	*tostring date, replace
	gen stub3 = substr(date,1,4)
	gen stub2 = substr(date,5,2)
	destring stub*, replace
	drop date
	gen date = ym(stub3, stub2)
	format %tm date
	rename stub3 year
	rename stub2 month
	capture drop stub*
	
	encode ds_code, gen(id)


	xtset id date
	gen dppi = ln(ppi) - ln(L1.ppi) if flag==0
	replace dppi = ln(ppi) - ln(100) if flag==1 //when the ppi is reported as "previous period=100", for example for Russia

	

	*duplicate observation that concern more than one isic 2digit category
	split isic, parse("_") gen(stub)

	capture expand 2 if stub3!="", gen(tag1)
	*if there is no error, then it means that there is a least one observation with
	*3 ISIC 2d
	if ! _rc {
		*so in that case, we replace the isic code by the 3rd category 
		replace stub1 = stub3 if tag1==1
		
		*then expand the 2nd category
		expand 2 if stub2!="" & tag1==0, gen(tag2)
		replace stub1 = stub2 if tag2==1		
	}
	*if there was an error, then proceed directly to 2nd category
	else {
		capture expand 2 if stub2!="", gen(tag2)
		if ! _rc {
			replace stub1 = stub2 if tag2==1
		}
	}

	replace isic = stub1
	drop if isic==""
	replace isic = strlower(isic)
	capture drop tag*
	drop stub*

	drop if isic=="d"
	joinby isic using temp_DS_manual_isicwiod.dta, unm(master)
	tab _merge
	drop _merge
	
	*collapse by WIOD CODE
	collapse (mean) dppi, by(date year month wiod_code)

	bys wiod_code (date): gen sumld = sum(dppi)
	gen ppi = 100*exp(sumld) if !mi(dppi)
	drop sumld
	drop dppi

	gen country = "`cc'"
	sort wiod_code date
	compress
	save 1.6_PPI_Datastream_manual_wiod_`cc', replace
}


*China: special case where the series are reported as current period previous year=100 (CPPY=100)
*and (only for end of sample) previous period = 100 (PP=100)
*Here I combine the PP=100 series with the CPPY=100 series to have a full monthly series
*Using the PP=100 series for 2014, one can know the change from december 2013 to november 2014.
*Using the CPPY=100 series, one can know the change from november 2013 to november 2014
*So one can reconstruct the change from november 2013 to december 2013, by taking 
*the YoY change (nov13-nov14) and taking out the sum of monthly changes from december 2013 to november 2014
*Then recursively you can get the monthly change all the way to the begining of the sample
foreach cc in CN {
	clear
	import excel using "$ppifolder\dsname_isic_manual_conversion_table.xlsx", sheet(`cc') firstrow

	replace ds_code = subinstr(ds_code,".","9",.)
	compress
	save tempDSISIC.dta, replace



	*
	clear
	insheet using "$ppifolder\DS_PPI_`cc'_m.csv", delim(",") non

	foreach dd of varlist _all {
		local newname = `dd'[1]
		local newname = subinstr("`newname'",".","9",.)
		local newname = subinstr("`newname'","&","_",.)
		local newname = subinstr("`newname'","%","8",.)
		
		rename `dd' v`newname'
	}

	rename vDATE date

	drop in 1
	drop if mi(date)
	reshape long v, i(date) j(ds_code) string
	rename v ppi

	replace ds_code = strupper(ds_code)
	merge m:1 ds_code using tempDSISIC.dta, keepusing(isic flag id)
	drop _merge

	

	destring ppi, replace ignore("NA")

	*generate the month variable
	tostring date, replace
	gen stub3 = substr(date,1,4)
	gen stub2 = substr(date,5,2)
	destring stub*, replace
	drop date
	gen date = ym(stub3, stub2)
	format %tm date
	rename stub3 year
	rename stub2 month
	capture drop stub*
	
	compress
	save temp_CN.dta, replace
	
	***
	*Save a file with the monthly change in 2014
	keep if date>=ym(2014,1) & date<=ym(2014,12)
	keep if flag == 1
	drop if mi(id_match)
	drop isic 
	drop flag
	xtset id_match month
	
	*the log change from a month to an other is ln(ppi) - ln(100) since PP=100
	gen dppi_2014 = ln(ppi) - ln(100)
	drop ppi
	compress
	save temp_2014_CN.dta, replace
	***
	
	***
	* Reconstruct the monthly changes for the series that are in CPPY=100
	clear
	use temp_CN.dta
	keep if flag==3
	drop if isic==""
	*merge with the monthly log change in 2014
	merge m:1 id_match month using temp_2014_CN.dta
	drop if _merge !=3
	drop _merge
	
	
	*compute the YoY 12 month change
	gen dppi_yoy = ln(ppi) - ln(100)
	
	drop if year>2014
	
	*sort the data by series, from 2014 to the past
	encode ds_code, gen(id_serie)
	gen minusyear = -year
	gen minusmonth = -month
	sort id_serie minusyear minusmonth
	
	*the dppi_mom is going to be the monthly change
	gen dppi_mom = .
	*start with 2014, where the values are given from the temp_2014_CN.dta file
	replace dppi_mom = dppi_2014 if year==2014
	*remeber the data is sorted by series, and then towards the past
	*so the first observations within a series are for 2014, then for the past
	*We go along the whole sample with a loop
	forvalues i = 1/ `=_N' {
		*skip the 2014 observations
		if year[`i'] != 2014 {	
			*compute the monthly change as the YoY change from i to i-11 (which corresponds
			*to the change from time t to t+11, rember the sorting)
			*Example: for december 2013, dppi_yoy[`i'-11] is the dppi_yoy for november
			*2014, that is the change from november 2013 to november 2014.
			*To recover the monthly change in december 2013 (that is the change from november 2013
			*to december 2013 (dppi is defined as ln(ppi)-ln(l1.ppi))), we take the nov13 to nov14 change
			*and remove the monthly changes dec13-jan14 (that is dppi_mom[`i'-1], the dppi_mom for jan14)
			*then jan14-feb14 (dppi_mom[`i'-2]) etc. until the monthly change oct14-nov14 (dppi_mom[`i'-11])
			qui: replace dppi_mom = dppi_yoy[`i'-11]- dppi_mom[`i'-11]-dppi_mom[`i'-10] ///
				-dppi_mom[`i'-9]-dppi_mom[`i'-8]-dppi_mom[`i'-7]-dppi_mom[`i'-6] ///
				-dppi_mom[`i'-5]-dppi_mom[`i'-4]-dppi_mom[`i'-3] ///
				-dppi_mom[`i'-2]-dppi_mom[`i'-1] if id_serie[`i'] == id_serie[`i'-11] in `i'
				* the condition if id_serie[`i'] == id_serie[`i'-11] should be supperfluous, but I leave it to be sure
		}
	}

	
	xtset id_serie date
	*compute the 12 month change to check that the computation was made correctly
	gen dppi_12 = dppi_mom+l1.dppi_mom+l2.dppi_mom+l3.dppi_mom+l4.dppi_mom ///
		+l5.dppi_mom+l6.dppi_mom+l7.dppi_mom+l8.dppi_mom+l9.dppi_mom ///
		+l10.dppi_mom+l11.dppi_mom
		
	*this should be exacty 1, or very very close to 1 for december 2014
	corr dppi_12 dppi_yoy 

	*save the relevant variables
	keep ds_code flag isic date dppi_mom year month
	compress
	save temp_CN_flag3.dta, replace // these are the series that are CPPY=100 and were reconstructed
	
	

	
	*duplicate observation that concern more than one isic 2digit category
	split isic, parse("_") gen(stub)

	capture expand 2 if stub3!="", gen(tag1)
	*if there is no error, then it means that there is a least one observation with
	*3 ISIC 2d
	if ! _rc {
		*so in that case, we replace the isic code by the 3rd category 
		replace stub1 = stub3 if tag1==1
		
		*then expand the 2nd category
		expand 2 if stub2!="" & tag1==0, gen(tag2)
		replace stub1 = stub2 if tag2==1		
	}
	*if there was an error, then proceed directly to 2nd category
	else {
		capture expand 2 if stub2!="", gen(tag2)
		if ! _rc {
			replace stub1 = stub2 if tag2==1
		}
	}

	replace isic = stub1
	drop if isic==""
	replace isic = strlower(isic)
	drop tag* stub*

	drop if isic=="d"
	joinby isic using temp_DS_manual_isicwiod.dta, unm(master)
	tab _merge
	drop _merge
	
	*collapse by WIOD CODE
	collapse (mean) dppi, by(date wiod_code year month)

	bys wiod_code (date): gen sumld = sum(dppi)
	gen ppi = 100*exp(sumld) if !mi(dppi)
	drop sumld
	drop dppi

	gen country = "`cc'"
	sort wiod_code date
	compress
	save 1.6_PPI_Datastream_manual_wiod_`cc', replace
}


use 1.6_PPI_Datastream_manual_wiod_AU, clear
foreach i in CN BR ID IN JP KR MX RU TW {
	append using 1.6_PPI_Datastream_manual_wiod_`i'
	}
drop if mi(ppi)
rename ppi ppi_manual
compress
save 1.6_PPI_Datastream_manual_wiod.dta, replace


	
}
* end of DATASTREAM

**************************
** IMF-IFS Country PPI Aggregates
**************************

global IMFAGG = 1

if $IMFAGG == 1 {

clear

*insheet the IMFIFS ppi
import excel using "$ppifolder\IMFIFS_ppi.xlsx"

drop in 1
drop IY

*rename the variable with the monthYear in the first row
foreach var of varlist _all {
	local name = `var'[1]
	if "`name'" != "" & "`name'" != "."  {
		local name = subinstr("`name'"," ","",.)
		local name  m`name'
		rename `var' `name'
	}
}
rename A country

drop in 1

destring m*, replace

reshape long m, i(country) j(date1) string

rename m ppi_agg
gen year = substr(date1,4,4)
gen mo = substr(date1,1,3)
gen month=1 if mo=="Jan"
replace month=2 if mo=="Feb"
replace month=3 if mo=="Mar"
replace month=4 if mo=="Apr"
replace month=5 if mo=="May"
replace month=6 if mo=="Jun"
replace month=7 if mo=="Jul"
replace month=8 if mo=="Aug"
replace month=9 if mo=="Sep"
replace month=10 if mo=="Oct"
replace month=11 if mo=="Nov"
replace month=12 if mo=="Dec"

destring year month, replace
gen date = ym(year, month)
drop mo date1

gen iso2 = "AU" if country == "Australia"
replace iso2 = "AT" if country == "Austria"
replace iso2 = "BE" if country == "Belgium"
replace iso2 = "BG" if country == "Bulgaria"
replace iso2 = "CA" if country == "Canada"
replace iso2 = "CZ" if country == "Czech Republic"
replace iso2 = "DK" if country == "Denmark"
replace iso2 = "FI" if country == "Finland"
replace iso2 = "FR" if country == "France"
replace iso2 = "DE" if country == "Germany"
replace iso2 = "GR" if country == "Greece"
replace iso2 = "HU" if country == "Hungary"
replace iso2 = "IE" if country == "Ireland"
replace iso2 = "IT" if country == "Italy"
replace iso2 = "JP" if country == "Japan"
replace iso2 = "KR" if country == "Korea, Republic of"
replace iso2 = "LT" if country == "Lithuania"
replace iso2 = "MX" if country == "Mexico"
replace iso2 = "NL" if country == "Netherlands"
replace iso2 = "PL" if country == "Poland"
replace iso2 = "PT" if country == "Portugal"
replace iso2 = "RO" if country == "Romania"
replace iso2 = "SI" if country == "Slovenia"
replace iso2 = "ES" if country == "Spain"
replace iso2 = "SE" if country == "Sweden"
replace iso2 = "GB" if country == "United Kingdom"
replace iso2 = "US" if country == "United States"

gen iso3 = "AUS" if country == "Australia"
replace iso3 = "AUT" if country == "Austria"
replace iso3 = "BEL" if country == "Belgium"
replace iso3 = "BGR" if country == "Bulgaria"
replace iso3 = "CAN" if country == "Canada"
replace iso3 = "CZE" if country == "Czech Republic"
replace iso3 = "DNK" if country == "Denmark"
replace iso3 = "FIN" if country == "Finland"
replace iso3 = "FRA" if country == "France"
replace iso3 = "DEU" if country == "Germany"
replace iso3 = "GRC" if country == "Greece"
replace iso3 = "HUN" if country == "Hungary"
replace iso3 = "IRL" if country == "Ireland"
replace iso3 = "ITA" if country == "Italy"
replace iso3 = "JPN" if country == "Japan"
replace iso3 = "KOR" if country == "Korea, Republic of"
replace iso3 = "LTU" if country == "Lithuania"
replace iso3 = "MEX" if country == "Mexico"
replace iso3 = "NLD" if country == "Netherlands"
replace iso3 = "POL" if country == "Poland"
replace iso3 = "PRT" if country == "Portugal"
replace iso3 = "ROM" if country == "Romania"
replace iso3 = "SVN" if country == "Slovenia"
replace iso3 = "ESP" if country == "Spain"
replace iso3 = "SWE" if country == "Sweden"
replace iso3 = "GBR" if country == "United Kingdom"
replace iso3 = "USA" if country == "United States"

compress
save 1.5_PPI_countrylevelIFS.dta, replace

}
*end of imf aggregate

* Next
cd "$rootfolder\data"
